home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Magnum One
/
Magnum One (Mid-American Digital) (Disc Manufacturing).iso
/
d12
/
portablc.arc
/
FILECMPR.C
< prev
next >
Wrap
Text File
|
1990-08-30
|
17KB
|
482 lines
/*************************************************************************
filecmpr.c : Compresses both ASCII and Binary Files
Originally secured from a DOS Bulletin Board. This is totally free Shareware.
Changed much of the original Dos file to work under Unix/Xenix System 5;
Cleaned up the code to make it more readable and maintainable;
Added logic to allow decompressing a file onto itself & added many edits;
I attached original author documentation below of the original program.
Note that this program was originally secured for use with the upload/down-
load system, however, anyone can use this to compress/uncompress any file.
This also has been tested under DOS using Microsoft Version 5.1.
12/8/89 Martin Katz
**************************************************************************
Original Author Documention:
LZSS.C -- A Data Compression Program
4/6/1989 Haruhiko Okumura
Use, distribute, and modify this program freely.
Please send me your improved versions.
PC-VAN SCIENCE
NIFTY-Serve PAF01022
CompuServe 74050,1022
The algorithm is quite simple:
Keep a ring buffer, which initially contains "space" characters
only. Read several letters from the file to the buffer. Then search
the buffer for the longest string that matches the letters just read,
and send its length and position in the buffer.
If the buffer size is 4096 bytes, the position can be encoded in 12
bits. If we represent the match length in four bits, the <position,
length> pair is two bytes long. If the longest match is no more than
two characters, then we send just one character without encoding, and
restart the process with the next letter. We must send one extra bit
each time to tell the decoder whether we are sending a <position,
length> pair or an unencoded character.
The accompanying file LZSS.C is a version of this algorithm. This
implementation uses multiple binary trees to speed up the search for the
longest match. All the programs in this article are written in
draft-proposed ANSI C. I tested them with Turbo C 2.0.
*************************************************************************/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define N 4096 /* size of ring buffer */
#define F 18 /* upper limit for match_length */
#define THRESHOLD 2 /* encode string into position and length
if match_length is greater than this */
#define NIL N /* index for root of binary search trees */
#ifdef TWO60
#define CLEAR printf("\033X")
#else
#define CLEAR printf("\033[H\033[2J\n")
#endif
#define BELL printf("\007\n")
unsigned long int textsize = 0, /* text size counter */
codesize = 0, /* code size counter */
printcount = 0; /* counter reporting progress every 1K bytes*/
unsigned char text_buf[N + F - 1]; /* ring buffer of size N, with extra F-1
bytes to facilitate string comparison */
int match_position, match_length, /* of longest match. These are set by the
InsertNode() procedure. */
lson[N + 1], rson[N + 257], dad[N + 1]; /* left & right children & parents,
These constitute binary search trees. */
FILE *infile, *outfile; /* input & output files */
void InitTree() /* initialize trees */
{
int i;
/* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
left children of node i. These nodes need not be initialized.
Also, dad[i] is the parent of node i. These are initialized to
NIL (= N), which stands for 'not used.'
For i = 0 to 255, rson[N + i + 1] is the root of the tree
for strings that begin with character i. These are initialized
to NIL. Note there are 256 trees. */
for (i = N + 1; i <= N + 256; i++)
rson[i] = NIL;
for (i = 0; i < N; i++)
dad[i] = NIL;
}
void InsertNode(r)
int r;
/* Inserts string of length F, text_buf[r..r+F-1], into one of the
trees (text_buf[r]'th tree) and returns the longest-match position
and length via the global variables match_position and match_length.
If match_length = F, then removes the old node in favor of the new
one, because the old one will be deleted sooner.
Note r plays double role, as tree node and position in buffer. */
{
int i, p, cmp;
unsigned char *key;
cmp = 1;
key = &text_buf[r];
p = N + 1 + key[0];
rson[r] = lson[r] = NIL;
match_length = 0;
for ( ; ; )
{
if (cmp >= 0)
{
if (rson[p] != NIL)
p = rson[p];
else
{
rson[p] = r;
dad[r] = p;
return;
}
}
else
{
if (lson[p] != NIL)
p = lson[p];
else
{
lson[p] = r;
dad[r] = p;
return;
}
}
for (i = 1; i < F; i++)
if ((cmp = key[i] - text_buf[p + i]) != 0)
break;
if (i > match_length)
{
match_position = p;
if ((match_length = i) >= F)
break;
}
}
dad[r] = dad[p];
lson[r] = lson[p];
rson[r] = rson[p];
dad[lson[p]] = r;
dad[rson[p]] = r;
if (rson[dad[p]] == p)
rson[dad[p]] = r;
else
lson[dad[p]] = r;
dad[p] = NIL; /* remove p */
}
void DeleteNode(p) /* deletes node p from tree */
int p;
{
int q;
if (dad[p] == NIL)
return; /* not in tree */
if (rson[p] == NIL)
q = lson[p];
else if (lson[p] == NIL)
q = rson[p];
else
{
q = lson[p];
if (rson[q] != NIL)
{
do
{
q = rson[q];
}
while (rson[q] != NIL);
rson[dad[q]] = lson[q];
dad[lson[q]] = dad[q];
lson[q] = lson[p];
dad[lson[p]] = q;
}
rson[q] = rson[p];
dad[rson[p]] = q;
}
dad[q] = dad[p];
if (rson[dad[p]] == p)
rson[dad[p]] = q;
else
lson[dad[p]] = q;
dad[p] = NIL;
}
void Encode()
{
int i, c, len, r, s, last_match_length, code_buf_ptr;
unsigned char code_buf[17], mask;
InitTree(); /* initialize trees */
code_buf[0] = 0; /* code_buf[1..16] saves eight units of code, and
code_buf[0] works as eight flags, "1" representing that the unit
is an unencoded letter (1 byte), "0" a position-and-length pair
(2 bytes). Thus, eight units require at most 16 bytes of code.*/
code_buf_ptr = mask = 1;
s = 0; r = N - F;
for (i = s; i < r; i++)
text_buf[i] = ' '; /* Clear the buffer with
any character that will appear often. */
for (len = 0; len < F && (c = getc(infile)) != EOF; len++)
text_buf[r + len] = c; /* Read F bytes into the last F bytes of
the buffer */
if ((textsize = len) == 0)
return; /* text of size zero */
for (i = 1; i <= F; i++)
InsertNode(r - i); /* Insert the F strings,
each of which begins with one or more 'space' characters. Note
the order in which these strings are inserted. This way,
degenerate trees will be less likely to occur. */
InsertNode(r); /* Finally, insert the whole string just read. The
global variables match_length and match_position are set. */
do
{
if (match_length > len)
match_length = len; /* match_length
may be spuriously long near the end of text. */
if (match_length <= THRESHOLD)
{
match_length = 1; /* Not long enough match. Send one byte. */
code_buf[0] |= mask; /* 'send one byte' flag */
code_buf[code_buf_ptr++] = text_buf[r]; /* Send uncoded. */
}
else
{
code_buf[code_buf_ptr++] = (unsigned char) match_position;
code_buf[code_buf_ptr++] = (unsigned char)
/* Send position and length pair. Note match_length > THRESHOLD. */
(((match_position >> 4) & 0xf0) | (match_length - (THRESHOLD + 1)));
}
if ((mask <<= 1) == 0) /* Shift mask left one bit. */
{
for (i = 0; i < code_buf_ptr; i++) /* Send at most 8 units of */
putc(code_buf[i], outfile); /* code together */
codesize += code_buf_ptr;
code_buf[0] = 0;
code_buf_ptr = mask = 1;
}
last_match_length = match_length;
for (i = 0; i < last_match_length && (c = getc(infile)) != EOF; i++)
{
DeleteNode(s); /* Delete old strings and */
text_buf[s] = c; /* read new bytes */
if (s < F - 1)
text_buf[s + N] = c; /* If the position is
near the end of buffer, extend the buffer to make
string comparison easier. */
s = (s + 1) & (N - 1);
r = (r + 1) & (N - 1);
/* Since this is a ring buffer, increment the position modulo N. */
InsertNode(r); /* Register the string in text_buf[r..r+F-1] */
}
if ((textsize += i) > printcount)
{
printf("%12ld\r", textsize);
printcount += 1024;
/* Reports progress each time the textsize exceeds
multiples of 1024. */
}
while (i++ < last_match_length)
{ /* After the end of text, */
DeleteNode(s); /* no need to read, but */
s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
if (--len)
InsertNode(r); /* buffer may not be empty. */
}
} while (len > 0); /* until length of string to be processed is zero */
if (code_buf_ptr > 1) /* Send remaining code. */
{
for (i = 0; i < code_buf_ptr; i++)
putc(code_buf[i], outfile);
codesize += code_buf_ptr;
}
printf("In : %ld bytes\n", textsize); /* Encoding is done. */
printf("Out: %ld bytes\n", codesize);
printf("Out/In: %.3f\n", (double)codesize / textsize);
}
void Decode() /* Just the reverse of Encode(). */
{
int i, j, k, r, c;
unsigned int flags;
for (i = 0; i < N - F; i++)
text_buf[i] = ' ';
r = N - F;
flags = 0;
for ( ; ; )
{
if (((flags >>= 1) & 256) == 0)
{
if ((c = getc(infile)) == EOF)
break;
flags = c | 0xff00; /* uses higher byte cleverly */
} /* to count eight */
if (flags & 1)
{
if ((c = getc(infile)) == EOF)
break;
putc(c, outfile);
text_buf[r++] = c;
r &= (N - 1);
}
else
{
if ((i = getc(infile)) == EOF)
break;
if ((j = getc(infile)) == EOF)
break;
i |= ((j & 0xf0) << 4);
j = (j & 0x0f) + THRESHOLD;
for (k = 0; k <= j; k++)
{
c = text_buf[(i + k) & (N - 1)];
putc(c, outfile);
text_buf[r++] = c;
r &= (N - 1);
}
}
}
}
void usage()
{
CLEAR;
printf("filecmpr: Four Arguments are required as exemplified:\n");
printf("'filecmpr c file1 file2' compresses file1 into file2.\n");
printf("'filecmpr d file2 file1' decompresses file2 into file1.\n");
BELL;
}
arg_check(argc, argv)
int argc;
char *argv[];
{
if (argc != 4)
{
usage();
return(-1);
}
if ( strpbrk(argv[1], "DCdc") == NULL )
{
usage();
return(-1);
}
if ( (strcmp(argv[3],argv[2]) == 0) && (strpbrk(argv[1], "Cc") != NULL) )
{
CLEAR;
printf("file_compress: Cannot compress a file onto itself.\n");
BELL;
return(-1);
}
return(0);
}
main(argc, argv)
int argc;
char *argv[];
{
char *to_file;
char cmnd[100], real_to_file_name[15];
short overlay_file = 0;
if (arg_check(argc, argv))
exit(-1);
if ( strcmp(argv[3],argv[2]) == 0 )
{
overlay_file = 1;
to_file = "interim_file";
strcpy(real_to_file_name,argv[3]);
}
else
to_file = argv[3];
if (( infile = fopen(argv[2], "r+b")) == NULL )
{
CLEAR;
printf("file_compress: unable to open from file %s\n", argv[2]);
BELL;
return(-1);
}
if (( outfile = fopen(to_file, "w+b")) == NULL )
{
CLEAR;
printf("file_compress: unable to open to file %s\n", to_file);
BELL;
return(-1);
}
if (toupper(*argv[1]) == 'C')
{
printf("\n-->Compressing %s into %s<--\n", argv[2], to_file);
Encode();
}
else
{
printf("\n-->Uncompressing %s<--\n", to_file);
Decode();
}
fclose(infile);
fclose(outfile);
if ( overlay_file == 1 )
{
#ifdef DOS
sprintf(cmnd,"copy interim_file %s", real_to_file_name);
#else
sprintf(cmnd,"mv interim_file %s", real_to_file_name);
#endif
system(cmnd);
#ifdef DOS
system("del interim_file");
#endif
}
return(0);
}